<html>
 <head>
  <meta charset="utf-8"/>
  <meta content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no" name="viewport"/>
  <title>
   小白如何入门 Python 爬虫？  | 数螺 | NAUT IDEA
  </title>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap-theme.min.css" rel="stylesheet"/>
  <link href="http://cdn.bootcss.com/bootstrap/3.3.6/css/bootstrap.min.css" rel="stylesheet"/>
  <style type="text/css">
   #xmain img {
                  max-width: 100%;
                  display: block;
                  margin-top: 10px;
                  margin-bottom: 10px;
                }

                #xmain p {
                    line-height:150%;
                    font-size: 16px;
                    margin-top: 20px;
                }

                #xmain h2 {
                    font-size: 24px;
                }

                #xmain h3 {
                    font-size: 20px;
                }

                #xmain h4 {
                    font-size: 18px;
                }


                .header {
	           background-color: #0099ff;
	           color: #ffffff;
	           margin-bottom: 20px;
	        }

	        .header p {
                  margin: 0px;
                  padding: 10px 0;
                  display: inline-block;  
                  vertical-align: middle;
                  font-size: 16px;
               }

               .header a {
                 color: white;
               }

              .header img {
                 height: 25px;
              }
  </style>
  <script src="http://cdn.bootcss.com/jquery/3.0.0/jquery.min.js">
  </script>
  <script src="http://nautstatic-10007657.file.myqcloud.com/static/css/readability.min.js" type="text/javascript">
  </script>
  <script type="text/javascript">
   $(document).ready(function() {
                 var loc = document.location;
                 var uri = {
                  spec: "http://dataunion.org/14510.html",
                  host: "http://dataunion.org",
                  prePath: "http://dataunion.org",
                  scheme: "http",
                  pathBase: "http://dataunion.org/"
                 };
    
                 var documentClone = document.cloneNode(true);
                 var article = new Readability(uri, documentClone).parse();
     
                 document.getElementById("xmain").innerHTML = article.content;
                });
  </script>
  <!-- 1466459530: Accept with keywords: (title(0.4):Python,社区,数盟,入门,爬虫, topn(0.3):社区,入门,复杂度,国内新闻,行业资讯,队例,内存,数据挖掘,机器,Python,学会,职业规划,效率,豆瓣,基础架构,文章,网页,python,数盟,地址,链接,人民日报,代码,爬虫,搜索引擎,集群,链向,页面,机子,分布式).-->
 </head>
 <body onload="">
  <div class="header">
   <div class="container">
    <div class="row">
     <div class="col-xs-6 col-sm-6 text-left">
      <a href="/databee">
       <img src="http://nautidea-10007657.cos.myqcloud.com/logo_white.png"/>
      </a>
      <a href="/databee">
       <p>
        数螺
       </p>
      </a>
     </div>
     <div class="hidden-xs col-sm-6 text-right">
      <p>
       致力于数据科学的推广和知识传播
      </p>
     </div>
    </div>
   </div>
  </div>
  <div class="container text-center">
   <h1>
    小白如何入门 Python 爬虫？
   </h1>
  </div>
  <div class="container" id="xmain">
   ﻿﻿
   <title>
    小白如何入门 Python 爬虫？ | 数盟社区
   </title>
   <!-- All in One SEO Pack 2.2.7.6.2 by Michael Torbert of Semper Fi Web Design[32,64] -->
   <!-- /all in one seo pack -->
   <!--
<div align="center">
<a href="http://strata.oreilly.com.cn/hadoop-big-data-cn?cmp=mp-data-confreg-home-stcn16_dataunion_pc" target="_blank"><img src="http://dataunion.org/wp-content/uploads/2016/05/stratabj.jpg"/ ></a>
</div>
-->
   <header id="header-web">
    <div class="header-main">
     <hgroup class="logo">
      <h1>
       <a href="http://dataunion.org/" rel="home" title="数盟社区">
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/logo.png"/>
       </a>
      </h1>
     </hgroup>
     <!--logo-->
     <nav class="header-nav">
      <ul class="menu" id="menu-%e4%b8%bb%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-71" id="menu-item-71">
        <a href="http://dataunion.org/category/events" title="events">
         活动
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22457" id="menu-item-22457">
          <a href="http://dataunion.org/2016timeline">
           2016档期
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22459" id="menu-item-22459">
          <a href="http://dataunion.org/category/parterc">
           合作会议
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category current-post-ancestor current-menu-parent current-post-parent menu-item-has-children menu-item-20869" id="menu-item-20869">
        <a href="http://dataunion.org/category/tech" title="articles">
         文章
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20867" id="menu-item-20867">
          <a href="http://dataunion.org/category/tech/base" title="base">
           基础架构
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3302" id="menu-item-3302">
          <a href="http://dataunion.org/category/tech/ai" title="ai">
           人工智能
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3303" id="menu-item-3303">
          <a href="http://dataunion.org/category/tech/analysis" title="analysis">
           数据分析
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21920" id="menu-item-21920">
          <a href="http://dataunion.org/category/tech/dm">
           数据挖掘
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3314" id="menu-item-3314">
          <a href="http://dataunion.org/category/tech/viz" title="viz">
           可视化
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-3305" id="menu-item-3305">
          <a href="http://dataunion.org/category/tech/devl" title="devl">
           编程语言
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-20876" id="menu-item-20876">
        <a href="http://dataunion.org/category/industry">
         行业
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-16328" id="menu-item-16328">
          <a href="http://dataunion.org/category/industry/case" title="case">
           行业应用
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-2112" id="menu-item-2112">
          <a href="http://dataunion.org/category/industry/demo" title="demo">
           Demo展示
          </a>
         </li>
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21562" id="menu-item-21562">
          <a href="http://dataunion.org/category/industry/news">
           行业资讯
          </a>
         </li>
        </ul>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-311" id="menu-item-311">
        <a href="http://dataunion.org/category/sources" title="sources">
         资源
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20870" id="menu-item-20870">
        <a href="http://dataunion.org/category/books" title="book">
         图书
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-21363" id="menu-item-21363">
        <a href="http://dataunion.org/category/training">
         课程
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-has-children menu-item-21853" id="menu-item-21853">
        <a href="http://dataunion.org/category/jobs">
         职位
        </a>
        <ul class="sub-menu">
         <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-22050" id="menu-item-22050">
          <a href="http://dataunion.org/category/career">
           职业规划
          </a>
         </li>
        </ul>
       </li>
      </ul>
     </nav>
     <!--header-nav-->
    </div>
   </header>
   <!--header-web-->
   <div id="main">
    <div id="soutab">
     <form action="http://dataunion.org/" class="search" method="get">
     </form>
    </div>
    <div id="container">
     <nav id="mbx">
      当前位置：
      <a href="http://dataunion.org">
       首页
      </a>
      &gt;
      <a href="http://dataunion.org/category/tech">
       文章
      </a>
      &gt;  正文
     </nav>
     <!--mbx-->
     <article class="content">
      <header align="centre" class="contenttitle">
       <div class="mscc">
        <h1 class="mscctitle">
         <a href="http://dataunion.org/14510.html">
          小白如何入门 Python 爬虫？
         </a>
        </h1>
        <address class="msccaddress ">
         <em>
          2,623 次阅读 -
         </em>
         <a href="http://dataunion.org/category/tech" rel="category tag">
          文章
         </a>
        </address>
       </div>
      </header>
      <div class="content-text">
       <p>
        作者：
        <a data-tip="p$t$xie-ke-41" href="http://www.zhihu.com/people/xie-ke-41">
         谢科
        </a>
        ，
        <strong class="zu-question-my-bio" title="用python分布式地爬过豆瓣/Twitter Search">
         用python分布式地爬过豆瓣/Twitter Search
        </strong>
       </p>
       <p>
        “入门”是良好的动机，但是可能作用缓慢。如果你手里或者脑子里有一个项目，那么实践起来你会被目标驱动，而不会像学习模块一样慢慢学习。
       </p>
       <p>
        另外如果说知识体系里的每一个知识点是图里的点，依赖关系是边的话，那么这个图一定不是一个有向无环图。因为学习A的经验可以帮助你学习B。因此，你不需要学习怎么样“入门”，因为这样的“入门”点根本不存在！你需要学习的是怎么样做一个比较大的东西，在这个过程中，你会很快地学会需要学会的东西的。当然，你可以争论说需要先懂python，不然怎么学会python做爬虫呢？但是事实上，你完全可以在做这个爬虫的过程中学习python 😀
       </p>
       <p>
        看到前面很多答案都讲的“术”——用什么软件怎么爬，那我就讲讲“道”和“术”吧——爬虫怎么工作以及怎么在python实现。
       </p>
       <p>
        先长话短说summarize一下：
        <br/>
        你需要学习
       </p>
       <ol>
        <li>
         基本的爬虫工作原理
        </li>
        <li>
         基本的http抓取工具，scrapy
        </li>
        <li>
         Bloom Filter:
         <a class=" wrap external" href="http://billmill.org/bloomfilter-tutorial/" rel="nofollow noreferrer" target="_blank">
          Bloom Filters by Example
          <i class="icon-external">
          </i>
         </a>
        </li>
        <li>
         如果需要大规模网页抓取，你需要学习分布式爬虫的概念。其实没那么玄乎，你只要学会怎样维护一个所有集群机器能够有效分享的分布式队列就好。最简单的实现是python-rq:
         <a class=" external" href="https://github.com/nvie/rq" rel="nofollow noreferrer" target="_blank">
          <span class="invisible">
           https://
          </span>
          <span class="visible">
           github.com/nvie/rq
          </span>
          <i class="icon-external">
          </i>
         </a>
        </li>
        <li>
         rq和Scrapy的结合：
         <a class=" wrap external" href="https://github.com/darkrho/scrapy-redis" rel="nofollow noreferrer" target="_blank">
          darkrho/scrapy-redis · GitHub
          <i class="icon-external">
          </i>
         </a>
        </li>
        <li>
         后续处理，网页析取(
         <a class=" wrap external" href="https://github.com/grangier/python-goose" rel="nofollow noreferrer" target="_blank">
          grangier/python-goose · GitHub
          <i class="icon-external">
          </i>
         </a>
         )，存储(Mongodb)
        </li>
       </ol>
       <p>
        以下是短话长说：
       </p>
       <p>
        说说当初写的一个集群爬下整个豆瓣的经验吧。
       </p>
       <p>
        1）首先你要明白爬虫怎样工作。
        <br/>
        想象你是一只蜘蛛，现在你被放到了互联“网”上。那么，你需要把所有的网页都看一遍。怎么办呢？没问题呀，你就随便从某个地方开始，比如说人民日报的首页，这个叫initial pages，用$表示吧。
       </p>
       <p>
        在人民日报的首页，你看到那个页面引向的各种链接。于是你很开心地从爬到了“国内新闻”那个页面。太好了，这样你就已经爬完了俩页面（首页和国内新闻）！暂且不用管爬下来的页面怎么处理的，你就想象你把这个页面完完整整抄成了个html放到了你身上。
       </p>
       <p>
        突然你发现， 在国内新闻这个页面上，有一个链接链回“首页”。作为一只聪明的蜘蛛，你肯定知道你不用爬回去的吧，因为你已经看过了啊。所以，你需要用你的脑子，存下你已经看过的页面地址。这样，每次看到一个可能需要爬的新链接，你就先查查你脑子里是不是已经去过这个页面地址。如果去过，那就别去了。
       </p>
       <p>
        好的，理论上如果所有的页面可以从initial page达到的话，那么可以证明你一定可以爬完所有的网页。
       </p>
       <p>
        那么在python里怎么实现呢？
        <br/>
        很简单
       </p>
       <div class="highlight">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-5768658459ee0988402459" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           &lt;span class="kn"&gt;import&lt;/span&gt; &lt;span class="nn"&gt;Queue&lt;/span&gt;

&lt;span class="n"&gt;initial_page&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="s"&gt;"http://www.renminribao.com"&lt;/span&gt;

&lt;span class="n"&gt;url_queue&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;Queue&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;Queue&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
&lt;span class="n"&gt;seen&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nb"&gt;set&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="n"&gt;seen&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;insert&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;initial_page&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="n"&gt;url_queue&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;put&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;initial_page&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="k"&gt;while&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="c"&gt;#一直进行直到海枯石烂&lt;/span&gt;
    &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;url_queue&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;size&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;&lt;span class="mi"&gt;0&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="n"&gt;current_url&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;url_queue&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;get&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;    &lt;span class="c"&gt;#拿出队例中第一个的url&lt;/span&gt;
        &lt;span class="n"&gt;store&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;current_url&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;               &lt;span class="c"&gt;#把这个url代表的网页存储好&lt;/span&gt;
        &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;next_url&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;extract_urls&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;current_url&lt;/span&gt;&lt;span class="p"&gt;):&lt;/span&gt; &lt;span class="c"&gt;#提取把这个url里链向的url&lt;/span&gt;
            &lt;span class="k"&gt;if&lt;/span&gt; &lt;span class="n"&gt;next_url&lt;/span&gt; &lt;span class="ow"&gt;not&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="n"&gt;seen&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;      
                &lt;span class="n"&gt;seen&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;put&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;next_url&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
                &lt;span class="n"&gt;url_queue&lt;/span&gt;&lt;span class="o"&gt;.&lt;/span&gt;&lt;span class="n"&gt;put&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;next_url&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="k"&gt;else&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;
        &lt;span class="k"&gt;break&lt;/span&gt;
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ee0988402459-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ee0988402459-21">
                21
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-5768658459ee0988402459-1">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "kn"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 import
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "nn"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 Queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-2">
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-3">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 initial_page
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "s"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-s">
                 "http://www.renminribao.com"
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-4">
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-5">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 url_queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 Queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 Queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-6">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 seen
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "nb"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 set
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-7">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-8">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 seen
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 insert
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 initial_page
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-9">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 url_queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 put
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 initial_page
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-10">
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-11">
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 while
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "bp"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-t">
                 True
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 #一直进行直到海枯石烂&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-12">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 url_queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 size
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "mi"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-13">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 current_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;=
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 url_queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 get
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 #拿出队例中第一个的url&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-14">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 store
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 current_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 #把这个url代表的网页存储好&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-15">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 next_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "ow"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 extract_urls
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 current_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "c"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-p">
                 #提取把这个url里链向的url&lt;/span&gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-16">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 next_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "ow"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 not
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "ow"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 seen
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-h">
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-17">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 seen
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 put
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 next_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-18">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 url_queue
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "o"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 put
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "n"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-v">
                 next_url
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-19">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 else
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "p"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-o">
                 :
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ee0988402459-20">
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-e">
                 span
                </span>
                <span class="crayon-t">
                 class
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-s">
                 "k"
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
                <span class="crayon-st">
                 break
                </span>
                <span class="crayon-o">
                 &lt;
                </span>
                <span class="crayon-o">
                 /
                </span>
                <span class="crayon-v">
                 span
                </span>
                <span class="crayon-o">
                 &gt;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ee0988402459-21">
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0979 seconds] -->
       </div>
       <p>
        写得已经很伪代码了。
       </p>
       <p>
        所有的爬虫的backbone都在这里，下面分析一下为什么爬虫事实上是个非常复杂的东西——搜索引擎公司通常有一整个团队来维护和开发。
       </p>
       <p>
        2）效率
        <br/>
        如果你直接加工一下上面的代码直接运行的话，你需要一整年才能爬下整个豆瓣的内容。更别说Google这样的搜索引擎需要爬下全网的内容了。
       </p>
       <p>
        问题出在哪呢？需要爬的网页实在太多太多了，而上面的代码太慢太慢了。设想全网有N个网站，那么分析一下判重的复杂度就是N*log(N)，因为所有网页要遍历一次，而每次判重用set的话需要log(N)的复杂度。OK，OK，我知道python的set实现是hash——不过这样还是太慢了，至少内存使用效率不高。
       </p>
       <p>
        通常的判重做法是怎样呢？
        <b>
         Bloom Filter
        </b>
        . 简单讲它仍然是一种hash的方法，但是它的特点是，它可以使用固定的内存（不随url的数量而增长）以O(1)的效率判定url是否已经在set中。可惜天下没有白吃的午餐，它的唯一问题在于，如果这个url不在set中，BF可以100%确定这个url没有看过。但是如果这个url在set中，它会告诉你：这个url应该已经出现过，不过我有2%的不确定性。注意这里的不确定性在你分配的内存足够大的时候，可以变得很小很少。一个简单的教程:
        <a class=" wrap external" href="http://billmill.org/bloomfilter-tutorial/" rel="nofollow noreferrer" target="_blank">
         Bloom Filters by Example
         <i class="icon-external">
         </i>
        </a>
       </p>
       <p>
        注意到这个特点，url如果被看过，那么可能以小概率重复看一看（没关系，多看看不会累死）。但是如果没被看过，一定会被看一下（这个很重要，不然我们就要漏掉一些网页了！）。 [IMPORTANT: 此段有问题，请暂时略过]
       </p>
       <p>
        好，现在已经接近处理判重最快的方法了。另外一个瓶颈——你只有一台机器。不管你的带宽有多大，只要你的机器下载网页的速度是瓶颈的话，那么你只有加快这个速度。用一台机子不够的话——用很多台吧！当然，我们假设每台机子都已经进了最大的效率——使用多线程（python的话，多进程吧）。
       </p>
       <p>
        3）集群化抓取
        <br/>
        爬取豆瓣的时候，我总共用了100多台机器昼夜不停地运行了一个月。想象如果只用一台机子你就得运行100个月了…
       </p>
       <p>
        那么，假设你现在有100台机器可以用，怎么用python实现一个分布式的爬取算法呢？
       </p>
       <p>
        我们把这100台中的99台运算能力较小的机器叫作slave，另外一台较大的机器叫作master，那么回顾上面代码中的url_queue，如果我们能把这个queue放到这台master机器上，所有的slave都可以通过网络跟master联通，每当一个slave完成下载一个网页，就向master请求一个新的网页来抓取。而每次slave新抓到一个网页，就把这个网页上所有的链接送到master的queue里去。同样，bloom filter也放到master上，但是现在master只发送确定没有被访问过的url给slave。Bloom Filter放到master的内存里，而被访问过的url放到运行在master上的Redis里，这样保证所有操作都是O(1)。（至少平摊是O(1)，Redis的访问效率见:
        <a class=" wrap external" href="http://redis.io/commands/linsert" rel="nofollow noreferrer" target="_blank">
         LINSERT – Redis
         <i class="icon-external">
         </i>
        </a>
        )
       </p>
       <p>
        考虑如何用python实现：
        <br/>
        在各台slave上装好scrapy，那么各台机子就变成了一台有抓取能力的slave，在master上装好Redis和rq用作分布式队列。
       </p>
       <p>
        代码于是写成
       </p>
       <div class="highlight">
        <!-- Crayon Syntax Highlighter v_2.7.2_beta -->
        <div class="crayon-syntax crayon-theme-classic crayon-font-monaco crayon-os-pc print-yes notranslate" data-settings=" minimize scroll-mouseover" id="crayon-5768658459ef7529587794" style=" margin-top: 12px; margin-bottom: 12px; font-size: 12px !important; line-height: 15px !important;">
         <div class="crayon-toolbar" data-settings=" mouseover overlay hide delay" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
          <span class="crayon-title">
          </span>
          <div class="crayon-tools" style="font-size: 12px !important;height: 18px !important; line-height: 18px !important;">
           <div class="crayon-button crayon-nums-button" title="切换是否显示行编号">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-plain-button" title="纯文本显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-wrap-button" title="切换自动换行">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-expand-button" title="点击展开代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-copy-button" title="复制代码">
            <div class="crayon-button-icon">
            </div>
           </div>
           <div class="crayon-button crayon-popup-button" title="在新窗口中显示代码">
            <div class="crayon-button-icon">
            </div>
           </div>
          </div>
         </div>
         <div class="crayon-info" style="min-height: 16.8px !important; line-height: 16.8px !important;">
         </div>
         <div class="crayon-plain-wrap">
          <textarea class="crayon-plain print-no" data-settings="dblclick" readonly="" style="-moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4; font-size: 12px !important; line-height: 15px !important;" wrap="soft">
           #slave.py

current_url = request_from_master()
to_send = []
for next_url in extract_urls(current_url):
    to_send.append(next_url)

store(current_url);
send_to_master(to_send)

#master.py
distributed_queue = DistributedQueue()
bf = BloomFilter()

initial_pages = "www.renmingribao.com"

while(True):
    if request == 'GET':
        if distributed_queue.size()&amp;gt;0:
            send(distributed_queue.get())
        else:
            break
    elif request == 'POST':
        bf.put(request.url)
          </textarea>
         </div>
         <div class="crayon-main" style="">
          <table class="crayon-table">
           <tbody>
            <tr class="crayon-row">
             <td class="crayon-nums " data-settings="show">
              <div class="crayon-nums-content" style="font-size: 12px !important; line-height: 15px !important;">
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-1">
                1
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-2">
                2
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-3">
                3
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-4">
                4
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-5">
                5
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-6">
                6
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-7">
                7
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-8">
                8
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-9">
                9
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-10">
                10
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-11">
                11
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-12">
                12
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-13">
                13
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-14">
                14
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-15">
                15
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-16">
                16
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-17">
                17
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-18">
                18
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-19">
                19
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-20">
                20
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-21">
                21
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-22">
                22
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-23">
                23
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-24">
                24
               </div>
               <div class="crayon-num" data-line="crayon-5768658459ef7529587794-25">
                25
               </div>
               <div class="crayon-num crayon-striped-num" data-line="crayon-5768658459ef7529587794-26">
                26
               </div>
              </div>
             </td>
             <td class="crayon-code">
              <div class="crayon-pre" style="font-size: 12px !important; line-height: 15px !important; -moz-tab-size:4; -o-tab-size:4; -webkit-tab-size:4; tab-size:4;">
               <div class="crayon-line" id="crayon-5768658459ef7529587794-1">
                <span class="crayon-p">
                 #slave.py
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-2">
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-3">
                <span class="crayon-v">
                 current_url
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 request_from_master
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-4">
                <span class="crayon-v">
                 to_send
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-sy">
                 [
                </span>
                <span class="crayon-sy">
                 ]
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-5">
                <span class="crayon-st">
                 for
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 next_url
                </span>
                <span class="crayon-st">
                 in
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 extract_urls
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 current_url
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-6">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 to_send
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 append
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 next_url
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-7">
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-8">
                <span class="crayon-e">
                 store
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 current_url
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 ;
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-9">
                <span class="crayon-e">
                 send_to_master
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 to_send
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-10">
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-11">
                <span class="crayon-p">
                 #master.py
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-12">
                <span class="crayon-v">
                 distributed_queue
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 DistributedQueue
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-13">
                <span class="crayon-v">
                 bf
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 BloomFilter
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-14">
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-15">
                <span class="crayon-v">
                 initial_pages
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 =
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 "www.renmingribao.com"
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-16">
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-17">
                <span class="crayon-st">
                 while
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-t">
                 True
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-18">
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 request
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 ==
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'GET'
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-19">
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 if
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 distributed_queue
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 size
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-o">
                 &amp;
                </span>
                <span class="crayon-v">
                 gt
                </span>
                <span class="crayon-sy">
                 ;
                </span>
                <span class="crayon-cn">
                 0
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-20">
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 send
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 distributed_queue
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 get
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-sy">
                 )
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-21">
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 else
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-22">
                <span class="crayon-h">
                </span>
                <span class="crayon-st">
                 break
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-23">
                <span class="crayon-h">
                </span>
                <span class="crayon-e">
                 elif
                </span>
                <span class="crayon-v">
                 request
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-o">
                 ==
                </span>
                <span class="crayon-h">
                </span>
                <span class="crayon-s">
                 'POST'
                </span>
                <span class="crayon-o">
                 :
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-24">
                <span class="crayon-h">
                </span>
                <span class="crayon-v">
                 bf
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-e">
                 put
                </span>
                <span class="crayon-sy">
                 (
                </span>
                <span class="crayon-v">
                 request
                </span>
                <span class="crayon-sy">
                 .
                </span>
                <span class="crayon-v">
                 url
                </span>
                <span class="crayon-sy">
                 )
                </span>
               </div>
               <div class="crayon-line" id="crayon-5768658459ef7529587794-25">
                <span class="crayon-h">
                </span>
               </div>
               <div class="crayon-line crayon-striped-line" id="crayon-5768658459ef7529587794-26">
               </div>
              </div>
             </td>
            </tr>
           </tbody>
          </table>
         </div>
        </div>
        <!-- [Format Time: 0.0048 seconds] -->
       </div>
       <p>
        好的，其实你能想到，有人已经给你写好了你需要的：
        <a class=" wrap external" href="https://github.com/darkrho/scrapy-redis" rel="nofollow noreferrer" target="_blank">
         darkrho/scrapy-redis · GitHub
         <i class="icon-external">
         </i>
        </a>
       </p>
       <p>
        4）展望及后处理
        <br/>
        虽然上面用很多“简单”，但是真正要实现一个商业规模可用的爬虫并不是一件容易的事。上面的代码用来爬一个整体的网站几乎没有太大的问题。
       </p>
       <p>
        但是如果附加上你需要这些后续处理，比如
       </p>
       <ol>
        <li>
         有效地存储（数据库应该怎样安排）
        </li>
        <li>
         有效地判重（这里指网页判重，咱可不想把人民日报和抄袭它的大民日报都爬一遍）
        </li>
        <li>
         有效地信息抽取（比如怎么样抽取出网页上所有的地址抽取出来，“朝阳区奋进路中华道”），搜索引擎通常不需要存储所有的信息，比如图片我存来干嘛…
        </li>
        <li>
         及时更新（预测这个网页多久会更新一次）
        </li>
       </ol>
       <p>
        如你所想，这里每一个点都可以供很多研究者十数年的研究。虽然如此，
        <br/>
        “路漫漫其修远兮,吾将上下而求索”。
       </p>
       <p>
        所以，不要问怎么入门，直接上路就好了：）
       </p>
       <p>
       </p>
       <p>
        知乎原帖》》http://www.zhihu.com/question/20899988
       </p>
      </div>
      <div>
       <strong>
        注：转载文章均来自于公开网络，仅供学习使用，不会用于任何商业用途，如果侵犯到原作者的权益，请您与我们联系删除或者授权事宜，联系邮箱：contact@dataunion.org。转载数盟网站文章请注明原文章作者，否则产生的任何版权纠纷与数盟无关。
       </strong>
      </div>
      <!--content_text-->
      <div class="fenxian">
       <!-- JiaThis Button BEGIN -->
       <div class="jiathis_style_32x32">
        <p class="jiathis_button_weixin">
        </p>
        <p class="jiathis_button_tsina">
        </p>
        <p class="jiathis_button_qzone">
        </p>
        <p class="jiathis_button_cqq">
        </p>
        <p class="jiathis_button_tumblr">
        </p>
        <a class="jiathis jiathis_txt jtico jtico_jiathis" href="http://www.jiathis.com/share" target="_blank">
        </a>
        <p class="jiathis_counter_style">
        </p>
       </div>
       <!-- JiaThis Button END -->
      </div>
     </article>
     <!--content-->
     <!--相关文章-->
     <div class="xianguan">
      <div class="xianguantitle">
       相关文章！
      </div>
      <ul class="pic">
       <li>
        <a href="http://dataunion.org/20820.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/09/1-300x200.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20820.html" rel="bookmark" title="人们对Python在企业级开发中的10大误解">
         人们对Python在企业级开发中的10大误解
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20587.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/08/t0192a35f52bc6e5eab-300x188.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20587.html" rel="bookmark" title="基于Python的卷积神经网络和特征提取">
         基于Python的卷积神经网络和特征提取
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20577.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/08/t01393a74373db553ec-300x206.jpg"/>
        </a>
        <a class="link" href="http://dataunion.org/20577.html" rel="bookmark" title="八大工具，透析Python数据生态圈最新趋势！">
         八大工具，透析Python数据生态圈最新趋势！
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/20544.html">
         <img src="http://dataunion.org/wp-content/uploads/2015/08/t01f6b96e9cd9bae4d9_副本1-300x178.png"/>
        </a>
        <a class="link" href="http://dataunion.org/20544.html" rel="bookmark" title="python有哪些好的学习资料或者博客？">
         python有哪些好的学习资料或者博客？
        </a>
       </li>
      </ul>
     </div>
     <!--相关文章-->
     <div class="comment" id="comments">
      <!-- You can start editing here. -->
      <!-- If comments are open, but there are no comments. -->
      <div class="title">
       期待你一针见血的评论，Come on！
      </div>
      <div id="respond">
       <p>
        不用想啦，马上
        <a href="http://dataunion.org/wp-login.php?redirect_to=http%3A%2F%2Fdataunion.org%2F14510.html">
         "登录"
        </a>
        发表自已的想法.
       </p>
      </div>
     </div>
     <!-- .nav-single -->
    </div>
    <!--Container End-->
    <aside id="sitebar">
     <div class="sitebar_list2">
      <div class="wptag">
       <span class="tagtitle">
        热门标签+
       </span>
       <div class="tagg">
        <ul class="menu" id="menu-%e5%8f%8b%e6%83%85%e9%93%be%e6%8e%a5">
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-1605" id="menu-item-1605">
          <a href="http://taidizh.com/">
           泰迪智慧
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20884" id="menu-item-20884">
          <a href="http://www.transwarp.cn/">
           星环科技
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-3538" id="menu-item-3538">
          <a href="http://datall.org/">
           珈和遥感
          </a>
         </li>
         <li class="menu-item menu-item-type-custom menu-item-object-custom menu-item-20888" id="menu-item-20888">
          <a href="http://www.chinahadoop.cn/">
           小象学院
          </a>
         </li>
        </ul>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <div class="textwidget">
       <div align="center">
        <a href="http://study.163.com/course/courseMain.htm?courseId=991022" target="_blank">
         <img src="http://dataunion.org/wp-content/uploads/2016/03/dv.jpg"/>
        </a>
       </div>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       文章分类
      </h4>
      <div class="tagcloud">
       <a class="tag-link-44" href="http://dataunion.org/category/industry/demo" style="font-size: 10.204724409449pt;" title="4个话题">
        Demo展示
       </a>
       <a class="tag-link-31" href="http://dataunion.org/category/experts" style="font-size: 15.826771653543pt;" title="52个话题">
        专家团队
       </a>
       <a class="tag-link-870" href="http://dataunion.org/category/tech/ai" style="font-size: 19.795275590551pt;" title="273个话题">
        人工智能
       </a>
       <a class="tag-link-488" href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f" style="font-size: 8pt;" title="1个话题">
        加入数盟
       </a>
       <a class="tag-link-869" href="http://dataunion.org/category/tech/viz" style="font-size: 17.204724409449pt;" title="93个话题">
        可视化
       </a>
       <a class="tag-link-30" href="http://dataunion.org/category/partners" style="font-size: 10.645669291339pt;" title="5个话题">
        合作伙伴
       </a>
       <a class="tag-link-889" href="http://dataunion.org/category/parterc" style="font-size: 11.582677165354pt;" title="8个话题">
        合作会议
       </a>
       <a class="tag-link-104" href="http://dataunion.org/category/books" style="font-size: 12.96062992126pt;" title="15个话题">
        图书
       </a>
       <a class="tag-link-220" href="http://dataunion.org/category/tech/base" style="font-size: 19.850393700787pt;" title="281个话题">
        基础架构
       </a>
       <a class="tag-link-219" href="http://dataunion.org/category/tech/analysis" style="font-size: 19.409448818898pt;" title="232个话题">
        数据分析
       </a>
       <a class="tag-link-887" href="http://dataunion.org/category/tech/dm" style="font-size: 13.291338582677pt;" title="17个话题">
        数据挖掘
       </a>
       <a class="tag-link-34" href="http://dataunion.org/category/tech" style="font-size: 20.732283464567pt;" title="404个话题">
        文章
       </a>
       <a class="tag-link-1" href="http://dataunion.org/category/uncategorized" style="font-size: 22pt;" title="693个话题">
        未分类
       </a>
       <a class="tag-link-4" href="http://dataunion.org/category/events" style="font-size: 14.503937007874pt;" title="29个话题">
        活动
       </a>
       <a class="tag-link-890" href="http://dataunion.org/category/tech/%e6%b7%b1%e5%ba%a6%e5%ad%a6%e4%b9%a0" style="font-size: 10.204724409449pt;" title="4个话题">
        深度学习
       </a>
       <a class="tag-link-221" href="http://dataunion.org/category/tech/devl" style="font-size: 18.968503937008pt;" title="193个话题">
        编程语言
       </a>
       <a class="tag-link-888" href="http://dataunion.org/category/career" style="font-size: 15.661417322835pt;" title="48个话题">
        职业规划
       </a>
       <a class="tag-link-5" href="http://dataunion.org/category/jobs" style="font-size: 14.11811023622pt;" title="25个话题">
        职位
       </a>
       <a class="tag-link-871" href="http://dataunion.org/category/industry" style="font-size: 15.716535433071pt;" title="49个话题">
        行业
       </a>
       <a class="tag-link-613" href="http://dataunion.org/category/industry/case" style="font-size: 16.984251968504pt;" title="84个话题">
        行业应用
       </a>
       <a class="tag-link-885" href="http://dataunion.org/category/industry/news" style="font-size: 17.425196850394pt;" title="102个话题">
        行业资讯
       </a>
       <a class="tag-link-10" href="http://dataunion.org/category/training" style="font-size: 14.228346456693pt;" title="26个话题">
        课程
       </a>
       <a class="tag-link-16" href="http://dataunion.org/category/sources" style="font-size: 15.661417322835pt;" title="48个话题">
        资源
       </a>
      </div>
     </div>
     <div class="sitebar_list">
      <h4 class="sitebar_title">
       功能
      </h4>
      <ul>
       <li>
        <a href="http://dataunion.org/wp-login.php?action=register">
         注册
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/wp-login.php">
         登录
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/feed">
         文章
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="http://dataunion.org/comments/feed">
         评论
         <abbr title="Really Simple Syndication">
          RSS
         </abbr>
        </a>
       </li>
       <li>
        <a href="https://cn.wordpress.org/" title="基于WordPress，一个优美、先进的个人信息发布平台。">
         WordPress.org
        </a>
       </li>
      </ul>
     </div>
    </aside>
    <div class="clear">
    </div>
   </div>
   <!--main-->
   ﻿
   <footer id="dibu">
    <div class="about">
     <div class="right">
      <ul class="menu" id="menu-%e5%ba%95%e9%83%a8%e8%8f%9c%e5%8d%95">
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-18024" id="menu-item-18024">
        <a href="http://dataunion.org/category/partners">
         合作伙伴
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20881" id="menu-item-20881">
        <a href="http://dataunion.org/contribute">
         文章投稿
        </a>
       </li>
       <li class="menu-item menu-item-type-taxonomy menu-item-object-category menu-item-20872" id="menu-item-20872">
        <a href="http://dataunion.org/category/%e5%8a%a0%e5%85%a5%e6%95%b0%e7%9b%9f">
         加入数盟
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-22441" id="menu-item-22441">
        <a href="http://dataunion.org/f-links">
         友情链接
        </a>
       </li>
       <li class="menu-item menu-item-type-post_type menu-item-object-page menu-item-20874" id="menu-item-20874">
        <a href="http://dataunion.org/aboutus">
         关于数盟
        </a>
       </li>
      </ul>
      <p class="banquan">
       数盟社区        ，
        做最棒的数据科学社区
      </p>
     </div>
     <div class="left">
      <ul class="bottomlist">
       <li>
        <a href="http://weibo.com/DataScientistUnion  " target="_blank" 　title="">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weibo.png"/>
        </a>
       </li>
       <li>
        <a class="cd-popup-trigger" href="http://dataunion.org/14510.html#0">
         <img src="http://dataunion.org/wp-content/themes/yzipi/images/weixin.png"/>
        </a>
       </li>
      </ul>
      <div class="cd-popup">
       <div class="cd-popup-container">
        <h1>
         扫描二维码,加微信公众号
        </h1>
        <img src="http://dataunion.org/wp-content/themes/yzipi/images/2014-12-06-1515289049.png"/>
        <a class="cd-popup-close" href="http://dataunion.org/14510.html">
        </a>
       </div>
       <!-- cd-popup-container -->
      </div>
      <!-- cd-popup -->
     </div>
    </div>
    <!--about-->
    <div class="bottom">
     <a href="http://dataunion.org/">
      数盟社区
     </a>
     <a href="http://www.miitbeian.gov.cn/" rel="external nofollow" target="_blank">
      京ICP备14026740号
     </a>
     联系我们：
     <a href="mailto:contact@dataunion.org" target="_blank">
      contact@dataunion.org
     </a>
     <div class="tongji">
     </div>
     <!--bottom-->
     <div class="scroll" id="scroll" style="display:none;">
      ︿
     </div>
    </div>
   </footer>
   <!--dibu-->
  </div>
 </body>
</html>